// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// package bufio -- go2cs converted at 2022 March 13 05:28:48 UTC
// import "bufio" ==> using bufio = go.bufio_package
// Original source: C:\Program Files\Go\src\bufio\scan.go
namespace go;

using bytes = bytes_package;
using errors = errors_package;
using io = io_package;
using utf8 = unicode.utf8_package;


// Scanner provides a convenient interface for reading data such as
// a file of newline-delimited lines of text. Successive calls to
// the Scan method will step through the 'tokens' of a file, skipping
// the bytes between the tokens. The specification of a token is
// defined by a split function of type SplitFunc; the default split
// function breaks the input into lines with line termination stripped. Split
// functions are defined in this package for scanning a file into
// lines, bytes, UTF-8-encoded runes, and space-delimited words. The
// client may instead provide a custom split function.
//
// Scanning stops unrecoverably at EOF, the first I/O error, or a token too
// large to fit in the buffer. When a scan stops, the reader may have
// advanced arbitrarily far past the last token. Programs that need more
// control over error handling or large tokens, or must run sequential scans
// on a reader, should use bufio.Reader instead.
//

public static partial class bufio_package {

public partial struct Scanner {
    public io.Reader r; // The reader provided by the client.
    public SplitFunc split; // The function to split the tokens.
    public nint maxTokenSize; // Maximum size of a token; modified by tests.
    public slice<byte> token; // Last token returned by split.
    public slice<byte> buf; // Buffer used as argument to split.
    public nint start; // First non-processed byte in buf.
    public nint end; // End of data in buf.
    public error err; // Sticky error.
    public nint empties; // Count of successive empty tokens.
    public bool scanCalled; // Scan has been called; buffer is in use.
    public bool done; // Scan has finished.
}

// SplitFunc is the signature of the split function used to tokenize the
// input. The arguments are an initial substring of the remaining unprocessed
// data and a flag, atEOF, that reports whether the Reader has no more data
// to give. The return values are the number of bytes to advance the input
// and the next token to return to the user, if any, plus an error, if any.
//
// Scanning stops if the function returns an error, in which case some of
// the input may be discarded. If that error is ErrFinalToken, scanning
// stops with no error.
//
// Otherwise, the Scanner advances the input. If the token is not nil,
// the Scanner returns it to the user. If the token is nil, the
// Scanner reads more data and continues scanning; if there is no more
// data--if atEOF was true--the Scanner returns. If the data does not
// yet hold a complete token, for instance if it has no newline while
// scanning lines, a SplitFunc can return (0, nil, nil) to signal the
// Scanner to read more data into the slice and try again with a
// longer slice starting at the same point in the input.
//
// The function is never called with an empty data slice unless atEOF
// is true. If atEOF is true, however, data may be non-empty and,
// as always, holds unprocessed text.
public delegate  error) SplitFunc(slice<byte>,  bool,  (nint,  slice<byte>);

// Errors returned by Scanner.
public static var ErrTooLong = errors.New("bufio.Scanner: token too long");public static var ErrNegativeAdvance = errors.New("bufio.Scanner: SplitFunc returns negative advance count");public static var ErrAdvanceTooFar = errors.New("bufio.Scanner: SplitFunc returns advance count beyond input");public static var ErrBadReadCount = errors.New("bufio.Scanner: Read returned impossible count");

 
// MaxScanTokenSize is the maximum size used to buffer a token
// unless the user provides an explicit buffer with Scanner.Buffer.
// The actual maximum token size may be smaller as the buffer
// may need to include, for instance, a newline.
public static readonly nint MaxScanTokenSize = 64 * 1024;

private static readonly nint startBufSize = 4096; // Size of initial allocation for buffer.

// NewScanner returns a new Scanner to read from r.
// The split function defaults to ScanLines.
public static ptr<Scanner> NewScanner(io.Reader r) {
    return addr(new Scanner(r:r,split:ScanLines,maxTokenSize:MaxScanTokenSize,));
}

// Err returns the first non-EOF error that was encountered by the Scanner.
private static error Err(this ptr<Scanner> _addr_s) {
    ref Scanner s = ref _addr_s.val;

    if (s.err == io.EOF) {
        return error.As(null!)!;
    }
    return error.As(s.err)!;
}

// Bytes returns the most recent token generated by a call to Scan.
// The underlying array may point to data that will be overwritten
// by a subsequent call to Scan. It does no allocation.
private static slice<byte> Bytes(this ptr<Scanner> _addr_s) {
    ref Scanner s = ref _addr_s.val;

    return s.token;
}

// Text returns the most recent token generated by a call to Scan
// as a newly allocated string holding its bytes.
private static @string Text(this ptr<Scanner> _addr_s) {
    ref Scanner s = ref _addr_s.val;

    return string(s.token);
}

// ErrFinalToken is a special sentinel error value. It is intended to be
// returned by a Split function to indicate that the token being delivered
// with the error is the last token and scanning should stop after this one.
// After ErrFinalToken is received by Scan, scanning stops with no error.
// The value is useful to stop processing early or when it is necessary to
// deliver a final empty token. One could achieve the same behavior
// with a custom error value but providing one here is tidier.
// See the emptyFinalToken example for a use of this value.
public static var ErrFinalToken = errors.New("final token");

// Scan advances the Scanner to the next token, which will then be
// available through the Bytes or Text method. It returns false when the
// scan stops, either by reaching the end of the input or an error.
// After Scan returns false, the Err method will return any error that
// occurred during scanning, except that if it was io.EOF, Err
// will return nil.
// Scan panics if the split function returns too many empty
// tokens without advancing the input. This is a common error mode for
// scanners.
private static bool Scan(this ptr<Scanner> _addr_s) => func((_, panic, _) => {
    ref Scanner s = ref _addr_s.val;

    if (s.done) {
        return false;
    }
    s.scanCalled = true; 
    // Loop until we have a token.
    while (true) { 
        // See if we can get a token with what we already have.
        // If we've run out of data but have an error, give the split function
        // a chance to recover any remaining, possibly empty token.
        if (s.end > s.start || s.err != null) {
            var (advance, token, err) = s.split(s.buf[(int)s.start..(int)s.end], s.err != null);
            if (err != null) {
                if (err == ErrFinalToken) {
                    s.token = token;
                    s.done = true;
                    return true;
                }
                s.setErr(err);
                return false;
            }
            if (!s.advance(advance)) {
                return false;
            }
            s.token = token;
            if (token != null) {
                if (s.err == null || advance > 0) {
                    s.empties = 0;
                }
                else
 { 
                    // Returning tokens not advancing input at EOF.
                    s.empties++;
                    if (s.empties > maxConsecutiveEmptyReads) {
                        panic("bufio.Scan: too many empty tokens without progressing");
                    }
                }
                return true;
            }
        }
        if (s.err != null) { 
            // Shut it down.
            s.start = 0;
            s.end = 0;
            return false;
        }
        if (s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf) / 2)) {
            copy(s.buf, s.buf[(int)s.start..(int)s.end]);
            s.end -= s.start;
            s.start = 0;
        }
        if (s.end == len(s.buf)) { 
            // Guarantee no overflow in the multiplication below.
            const var maxInt = int(~uint(0) >> 1);

            if (len(s.buf) >= s.maxTokenSize || len(s.buf) > maxInt / 2) {
                s.setErr(ErrTooLong);
                return false;
            }
            var newSize = len(s.buf) * 2;
            if (newSize == 0) {
                newSize = startBufSize;
            }
            if (newSize > s.maxTokenSize) {
                newSize = s.maxTokenSize;
            }
            var newBuf = make_slice<byte>(newSize);
            copy(newBuf, s.buf[(int)s.start..(int)s.end]);
            s.buf = newBuf;
            s.end -= s.start;
            s.start = 0;
        }
        {
            nint loop = 0;

            while () {
                var (n, err) = s.r.Read(s.buf[(int)s.end..(int)len(s.buf)]);
                if (n < 0 || len(s.buf) - s.end < n) {
                    s.setErr(ErrBadReadCount);
                    break;
                }
                s.end += n;
                if (err != null) {
                    s.setErr(err);
                    break;
                }
                if (n > 0) {
                    s.empties = 0;
                    break;
                }
                loop++;
                if (loop > maxConsecutiveEmptyReads) {
                    s.setErr(io.ErrNoProgress);
                    break;
                }
            }

        }
    }
});

// advance consumes n bytes of the buffer. It reports whether the advance was legal.
private static bool advance(this ptr<Scanner> _addr_s, nint n) {
    ref Scanner s = ref _addr_s.val;

    if (n < 0) {
        s.setErr(ErrNegativeAdvance);
        return false;
    }
    if (n > s.end - s.start) {
        s.setErr(ErrAdvanceTooFar);
        return false;
    }
    s.start += n;
    return true;
}

// setErr records the first error encountered.
private static void setErr(this ptr<Scanner> _addr_s, error err) {
    ref Scanner s = ref _addr_s.val;

    if (s.err == null || s.err == io.EOF) {
        s.err = err;
    }
}

// Buffer sets the initial buffer to use when scanning and the maximum
// size of buffer that may be allocated during scanning. The maximum
// token size is the larger of max and cap(buf). If max <= cap(buf),
// Scan will use this buffer only and do no allocation.
//
// By default, Scan uses an internal buffer and sets the
// maximum token size to MaxScanTokenSize.
//
// Buffer panics if it is called after scanning has started.
private static void Buffer(this ptr<Scanner> _addr_s, slice<byte> buf, nint max) => func((_, panic, _) => {
    ref Scanner s = ref _addr_s.val;

    if (s.scanCalled) {
        panic("Buffer called after Scan");
    }
    s.buf = buf[(int)0..(int)cap(buf)];
    s.maxTokenSize = max;
});

// Split sets the split function for the Scanner.
// The default split function is ScanLines.
//
// Split panics if it is called after scanning has started.
private static void Split(this ptr<Scanner> _addr_s, SplitFunc split) => func((_, panic, _) => {
    ref Scanner s = ref _addr_s.val;

    if (s.scanCalled) {
        panic("Split called after Scan");
    }
    s.split = split;
});

// Split functions

// ScanBytes is a split function for a Scanner that returns each byte as a token.
public static (nint, slice<byte>, error) ScanBytes(slice<byte> data, bool atEOF) {
    nint advance = default;
    slice<byte> token = default;
    error err = default!;

    if (atEOF && len(data) == 0) {
        return (0, null, error.As(null!)!);
    }
    return (1, data[(int)0..(int)1], error.As(null!)!);
}

private static slice<byte> errorRune = (slice<byte>)string(utf8.RuneError);

// ScanRunes is a split function for a Scanner that returns each
// UTF-8-encoded rune as a token. The sequence of runes returned is
// equivalent to that from a range loop over the input as a string, which
// means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd".
// Because of the Scan interface, this makes it impossible for the client to
// distinguish correctly encoded replacement runes from encoding errors.
public static (nint, slice<byte>, error) ScanRunes(slice<byte> data, bool atEOF) {
    nint advance = default;
    slice<byte> token = default;
    error err = default!;

    if (atEOF && len(data) == 0) {
        return (0, null, error.As(null!)!);
    }
    if (data[0] < utf8.RuneSelf) {
        return (1, data[(int)0..(int)1], error.As(null!)!);
    }
    var (_, width) = utf8.DecodeRune(data);
    if (width > 1) { 
        // It's a valid encoding. Width cannot be one for a correctly encoded
        // non-ASCII rune.
        return (width, data[(int)0..(int)width], error.As(null!)!);
    }
    if (!atEOF && !utf8.FullRune(data)) { 
        // Incomplete; get more bytes.
        return (0, null, error.As(null!)!);
    }
    return (1, errorRune, error.As(null!)!);
}

// dropCR drops a terminal \r from the data.
private static slice<byte> dropCR(slice<byte> data) {
    if (len(data) > 0 && data[len(data) - 1] == '\r') {
        return data[(int)0..(int)len(data) - 1];
    }
    return data;
}

// ScanLines is a split function for a Scanner that returns each line of
// text, stripped of any trailing end-of-line marker. The returned line may
// be empty. The end-of-line marker is one optional carriage return followed
// by one mandatory newline. In regular expression notation, it is `\r?\n`.
// The last non-empty line of input will be returned even if it has no
// newline.
public static (nint, slice<byte>, error) ScanLines(slice<byte> data, bool atEOF) {
    nint advance = default;
    slice<byte> token = default;
    error err = default!;

    if (atEOF && len(data) == 0) {
        return (0, null, error.As(null!)!);
    }
    {
        var i = bytes.IndexByte(data, '\n');

        if (i >= 0) { 
            // We have a full newline-terminated line.
            return (i + 1, dropCR(data[(int)0..(int)i]), error.As(null!)!);
        }
    } 
    // If we're at EOF, we have a final, non-terminated line. Return it.
    if (atEOF) {
        return (len(data), dropCR(data), error.As(null!)!);
    }
    return (0, null, error.As(null!)!);
}

// isSpace reports whether the character is a Unicode white space character.
// We avoid dependency on the unicode package, but check validity of the implementation
// in the tests.
private static bool isSpace(int r) {
    if (r <= '\u00FF') { 
        // Obvious ASCII ones: \t through \r plus space. Plus two Latin-1 oddballs.
        switch (r) {
            case ' ': 

            case '\t': 

            case '\n': 

            case '\v': 

            case '\f': 

            case '\r': 
                return true;
                break;
            case '\u0085': 

            case '\u00A0': 
                return true;
                break;
        }
        return false;
    }
    if ('\u2000' <= r && r <= '\u200a') {
        return true;
    }
    switch (r) {
        case '\u1680': 

        case '\u2028': 

        case '\u2029': 

        case '\u202f': 

        case '\u205f': 

        case '\u3000': 
            return true;
            break;
    }
    return false;
}

// ScanWords is a split function for a Scanner that returns each
// space-separated word of text, with surrounding spaces deleted. It will
// never return an empty string. The definition of space is set by
// unicode.IsSpace.
public static (nint, slice<byte>, error) ScanWords(slice<byte> data, bool atEOF) {
    nint advance = default;
    slice<byte> token = default;
    error err = default!;
 
    // Skip leading spaces.
    nint start = 0;
    {
        nint width__prev1 = width;

        nint width = 0;

        while (start < len(data)) {
            int r = default;
            r, width = utf8.DecodeRune(data[(int)start..]);
            if (!isSpace(r)) {
                break;
            start += width;
            }
        }

        width = width__prev1;
    } 
    // Scan until space, marking end of word.
    {
        nint width__prev1 = width;

        width = 0;
        var i = start;

        while (i < len(data)) {
            r = default;
            r, width = utf8.DecodeRune(data[(int)i..]);
            if (isSpace(r)) {
                return (i + width, data[(int)start..(int)i], error.As(null!)!);
            i += width;
            }
        }

        width = width__prev1;
    } 
    // If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
    if (atEOF && len(data) > start) {
        return (len(data), data[(int)start..], error.As(null!)!);
    }
    return (start, null, error.As(null!)!);
}

} // end bufio_package
